Making Backyards Affordable for All: A YIMBY Analysis
Author
Matthew Rivera
Published
October 23, 2025
Introduction
Housing affordability remains one of the most pressing challenges facing American cities today. This analysis examines metropolitan areas across the United States to identify “YIMBY” (Yes In My Backyard) success stories—cities that have effectively addressed housing affordability through permissive building policies. Using data from the US Census Bureau and Bureau of Labor Statistics, we develop metrics to measure rent burden and housing growth, ultimately identifying which cities have made meaningful progress in making housing more affordable.
Data Acquisition
Task 1: Data Import
Code
# First time only - install keytidycensus::census_api_key("c71b785213dc9e92a64a6453a1dc1bf0a47b3594", overwrite =TRUE, install =TRUE)
[1] "c71b785213dc9e92a64a6453a1dc1bf0a47b3594"
Code
if(!dir.exists(file.path("data", "mp02"))){dir.create(file.path("data", "mp02"), showWarnings=FALSE, recursive=TRUE)}library <-function(pkg){## Mask base::library() to automatically install packages if needed## Masking is important here so downlit picks up packages and links## to documentation pkg <-as.character(substitute(pkg))options(repos =c(CRAN ="https://cloud.r-project.org"))if(!require(pkg, character.only=TRUE, quietly=TRUE)) install.packages(pkg)stopifnot(require(pkg, character.only=TRUE, quietly=TRUE))}library(tidyverse)library(glue)library(readxl)library(tidycensus)get_acs_all_years <-function(variable, geography="cbsa",start_year=2009, end_year=2023){ fname <-glue("{variable}_{geography}_{start_year}_{end_year}.csv") fname <-file.path("data", "mp02", fname)if(!file.exists(fname)){ YEARS <-seq(start_year, end_year) YEARS <- YEARS[YEARS !=2020] # Drop 2020 - No survey (covid) ALL_DATA <-map(YEARS, function(yy){ tidycensus::get_acs(geography, variable, year=yy, survey="acs1") |>mutate(year=yy) |>select(-moe, -variable) |>rename(!!variable := estimate) }) |>bind_rows()write_csv(ALL_DATA, fname) }read_csv(fname, show_col_types=FALSE)}# Household income (12 month)INCOME <-get_acs_all_years("B19013_001") |>rename(household_income = B19013_001)# Monthly rentRENT <-get_acs_all_years("B25064_001") |>rename(monthly_rent = B25064_001)# Total populationPOPULATION <-get_acs_all_years("B01003_001") |>rename(population = B01003_001)# Total number of householdsHOUSEHOLDS <-get_acs_all_years("B11001_001") |>rename(households = B11001_001)
library(httr2)library(rvest)get_bls_qcew_annual_averages <-function(start_year=2009, end_year=2023){ fname <-glue("bls_qcew_{start_year}_{end_year}.csv.gz") fname <-file.path("data", "mp02", fname) YEARS <-seq(start_year, end_year) YEARS <- YEARS[YEARS !=2020] # Drop Covid year to match ACSif(!file.exists(fname)){ ALL_DATA <-map(YEARS, .progress=TRUE, possibly(function(yy){ fname_inner <-file.path("data", "mp02", glue("{yy}_qcew_annual_singlefile.zip"))if(!file.exists(fname_inner)){request("https://www.bls.gov") |>req_url_path("cew", "data", "files", yy, "csv",glue("{yy}_annual_singlefile.zip")) |>req_headers(`User-Agent`="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:143.0) Gecko/20100101 Firefox/143.0") |>req_retry(max_tries=5) |>req_perform(fname_inner) }if(file.info(fname_inner)$size <755e5){warning(sQuote(fname_inner), "appears corrupted. Please delete and retry this step.") }read_csv(fname_inner, show_col_types=FALSE) |>mutate(YEAR = yy) |>select(area_fips, industry_code, annual_avg_emplvl, total_annual_wages, YEAR) |>filter(nchar(industry_code) <=5, str_starts(area_fips, "C")) |>filter(str_detect(industry_code, "-", negate=TRUE)) |>mutate(FIPS = area_fips, INDUSTRY =as.integer(industry_code), EMPLOYMENT =as.integer(annual_avg_emplvl), TOTAL_WAGES = total_annual_wages) |>select(-area_fips, -industry_code, -annual_avg_emplvl, -total_annual_wages) |># 10 is a special value: "all industries" , so omitfilter(INDUSTRY !=10) |>mutate(AVG_WAGE = TOTAL_WAGES / EMPLOYMENT) })) |>bind_rows()write_csv(ALL_DATA, fname) } ALL_DATA <-read_csv(fname, show_col_types=FALSE) ALL_DATA_YEARS <-unique(ALL_DATA$YEAR) YEARS_DIFF <-setdiff(YEARS, ALL_DATA_YEARS)if(length(YEARS_DIFF) >0){stop("Download failed for the following years: ", YEARS_DIFF, ". Please delete intermediate files and try again.") } ALL_DATA}WAGES <-get_bls_qcew_annual_averages()
Data Structure Overview
erDiagram
INCOME {
varchar GEOID PK "CBSA code"
varchar NAME "CBSA name"
int year PK "2009-2023, excl 2020"
decimal household_income "Median annual income"
}
RENT {
varchar GEOID PK "CBSA code"
varchar NAME
int year PK "2009-2023, excl 2020"
decimal monthly_rent "Median gross rent"
}
POPULATION {
varchar GEOID PK "CBSA code"
varchar NAME
int year PK "2009-2023, excl 2020"
int population "Total population"
}
HOUSEHOLDS {
varchar GEOID PK "CBSA code"
varchar NAME
int year PK "2009-2023, excl 2020"
int households "Total households"
}
PERMITS {
int CBSA PK "CBSA code as integer"
int year PK "2009-2023, all years"
int new_housing_units_permitted
}
WAGES {
varchar FIPS PK "CBSA with C prefix"
int INDUSTRY PK "NAICS code"
int YEAR PK "2009-2023, excl 2020"
int EMPLOYMENT
decimal TOTAL_WAGES
decimal AVG_WAGE
}
INDUSTRY_CODES {
varchar level4_code PK "5-digit NAICS"
varchar level4_title
varchar level3_code "4-digit NAICS"
varchar level3_title
varchar level2_code "3-digit NAICS"
varchar level2_title
varchar level1_code "2-digit sector"
varchar level1_title
}
%% Direct Census ACS joins (GEOID, year)
INCOME ||--|| RENT : "GEOID, year"
INCOME ||--|| POPULATION : "GEOID, year"
INCOME ||--|| HOUSEHOLDS : "GEOID, year"
RENT ||--|| POPULATION : "GEOID, year"
RENT ||--|| HOUSEHOLDS : "GEOID, year"
POPULATION ||--|| HOUSEHOLDS : "GEOID, year"
%% Complex joins requiring transformation
PERMITS }o--|| INCOME : "int(GEOID)"
PERMITS }o--|| POPULATION : "int(GEOID)"
WAGES }o--|| INCOME : "remove C prefix"
WAGES }o--|| POPULATION : "remove C prefix"
%% Industry code lookup
WAGES }o--o{ INDUSTRY_CODES : "INDUSTRY code"
We have successfully imported five main datasets:
INCOME: Household income by CBSA and year
RENT: Monthly rent by CBSA and year
POPULATION: Total population by CBSA and year
HOUSEHOLDS: Number of households by CBSA and year
PERMITS: New housing units permitted by CBSA and year
WAGES: Employment and wage data by CBSA, industry, and year
INDUSTRY_CODES: NAICS industry code lookup table
Key Join Keys: - Census data uses GEOID (as character) and NAME (CBSA name) - BLS data uses FIPS (CBSA code as “C12345”) - Permits data uses CBSA (as integer) - All datasets include year for temporal joins
Data Integration and Initial Exploration
Task 2: Multi-Table Questions
Question 1: Which CBSA (by name) permitted the largest number of new housing units in the decade from 2010 to 2019 (inclusive)?
Code
# Load DT if not already loaded (add to setup chunk or here)library(DT)# Which CBSA permitted the most new housing units from 2010-2019?q1_result <- PERMITS |>filter(year >=2010, year <=2019) |>group_by(CBSA) |>summarize(total_permits =sum(new_housing_units_permitted, na.rm =TRUE)) |>arrange(desc(total_permits)) |>slice(1) |>left_join(POPULATION |>select(GEOID, NAME) |>distinct(),by =c("CBSA"="GEOID"))# Display resultq1_result |>select(NAME, total_permits) |> DT::datatable(caption ="CBSA with Most Housing Permits (2010-2019)",options =list(pageLength =5))
Answer: The Houston-Sugar Land-Baytown, TX Metro Area CBSA permitted the largest number of new housing units between 2010 and 2019, with a total of 482,075 units.
Question 2: In what year did Albuquerque, NM (CBSA Number 10740) permit the most new housing units?
Code
# In what year did Albuquerque permit the most new housing units?q2_result <- PERMITS |>filter(CBSA ==10740) |>arrange(desc(new_housing_units_permitted)) |>slice(1)# Show all years for contextalbuquerque_permits <- PERMITS |>filter(CBSA ==10740) |>arrange(year)# Displayalbuquerque_permits |>datatable(caption ="Albuquerque Housing Permits by Year",options =list(pageLength =15))
Answer: Albuquerque permitted the most new housing units in 2021, with 4,021 permits issued.
Note: Be careful about the COVID-19 data artifact mentioned in the instructions.
Question 3: Which state (not CBSA) had the highest average individual income in 2015? To answer this question, you will need to first compute the total income per CBSA by multiplying the average household income by the number of households, and then sum total income and total population across all CBSAs in a state. With these numbers, you can answer this question.
Code
# Create state abbreviation lookupstate_df <-data.frame(abb =c(state.abb, "DC", "PR"),name =c(state.name, "District of Columbia", "Puerto Rico"))# Calculate state-level average individual income for 2015# Assumes INCOME has 'household_income' (average household income),# HOUSEHOLDS has 'households' (number of households),# and POPULATION has 'population' (total population),# all joined by GEOID, NAME, yearq3_result <- INCOME |>filter(year ==2015) |>left_join(HOUSEHOLDS |>filter(year ==2015), by =c("GEOID", "NAME", "year")) |>left_join(POPULATION |>filter(year ==2015), by =c("GEOID", "NAME", "year")) |>mutate(state =str_extract(NAME, ", (.{2})", group =1),total_income = household_income * households) |>group_by(state) |>summarize(total_income =sum(total_income, na.rm =TRUE),total_population =sum(population, na.rm =TRUE),avg_individual_income = total_income / total_population ) |>arrange(desc(avg_individual_income)) |>left_join(state_df, by =c("state"="abb"))# Display top 10q3_result |>slice(1:10) |>select(name, state, avg_individual_income, total_population) |>mutate(avg_individual_income = scales::dollar(avg_individual_income)) |> DT::datatable(caption ="States by Average Individual Income (2015)",options =list(pageLength =15))
Answer: Massachusetts had the highest average individual income in 2015 at 2.7620622^{4}.
Question 4: Data scientists and business analysts are recorded under NAICS code 5182. What is the last year in which the NYC CBSA had the most data scientists in the country? In recent, the San Francisco CBSA has had the most data scientists.
Code
# Data scientists are NAICS code 5182# Need to standardize CBSA codes between Census and BLS datadata_scientists <- WAGES |>filter(INDUSTRY ==5182) |>mutate(std_cbsa =paste0(FIPS, "0")) |>select(std_cbsa, YEAR, EMPLOYMENT)# Find which CBSA had most data scientists each yearq4_result <- data_scientists |>group_by(YEAR) |>slice_max(EMPLOYMENT, n =1) |>ungroup() |>left_join( POPULATION |>mutate(std_cbsa =paste0("C", GEOID)) |>select(std_cbsa, NAME) |>distinct(),by ="std_cbsa" )# Displayq4_result |>select(YEAR, NAME, EMPLOYMENT) |>datatable(caption ="CBSA with Most Data Scientists by Year",options =list(pageLength =15))
Answer: NYC last had the most data scientists in 2015.
Question 5: What fraction of total wages in the NYC CBSA was earned by people employed in the finance and insurance industries (NAICS code 52)? In what year did this fraction peak?
Answer: The finance and insurance sector peaked at of total NYC wages in 2014.
Task 3: Initial Visualizations
Visualization 1: Rent vs. Household Income (2009)
Code
# Join rent and income data for 2009rent_income_2009 <- RENT |>filter(year ==2009) |>inner_join(INCOME |>filter(year ==2009), by =c("GEOID", "NAME", "year"))# Create scatter plotggplot(rent_income_2009, aes(x = household_income, y = monthly_rent)) +geom_point(alpha =0.6, color ="steelblue", size =2) +geom_smooth(method ="lm", color ="darkred", se =TRUE) +scale_x_continuous(labels = scales::dollar_format(), limits =c(0, NA)) +scale_y_continuous(labels = scales::dollar_format(),limits =c(0, NA)) +labs(title ="Relationship Between Household Income and Monthly Rent",subtitle ="Core-Based Statistical Areas (CBSAs) in 2009",x ="Average Household Income (Annual)",y ="Average Monthly Rent",caption ="Source: US Census Bureau, American Community Survey" ) +theme_minimal(base_size =12) +theme(plot.title =element_text(face ="bold", size =14),panel.grid.minor =element_blank() )
Interpretation: There is a strong positive relationship between household income and monthly rent across CBSAs. Higher-income areas tend to have higher rents, suggesting that housing costs scale with local economic conditions.
Visualization 2: Total vs. Healthcare Employment Over Time
Code
# Load required librarieslibrary(ggplot2)library(scales) # For comma_format()library(viridis) # For scale_color_viridis_c() (if not already loaded)library(dplyr) # For mutate, group_by, summarize (if not already loaded)# Healthcare is NAICS code 62healthcare_employment <- WAGES |>mutate(is_healthcare = INDUSTRY ==62,std_cbsa =paste0(FIPS, "0")) |>group_by(std_cbsa, YEAR) |>summarize(healthcare_employment =sum(EMPLOYMENT[is_healthcare], na.rm =TRUE),total_employment =sum(EMPLOYMENT, na.rm =TRUE),.groups ="drop" ) |>filter(total_employment >0, healthcare_employment >0)# Create scatter plot with time progressionggplot(healthcare_employment, aes(x = total_employment, y = healthcare_employment, color = YEAR)) +geom_point(alpha =0.5, size =1.5) +scale_color_viridis_c(option ="plasma") +scale_x_log10(labels = scales::comma_format()) +scale_y_log10(labels = scales::comma_format()) +labs(title ="Healthcare Employment vs. Total Employment Across CBSAs",subtitle ="Evolution from 2009-2023 (log-log scale)",x ="Total Employment (log scale)",y ="Healthcare & Social Services Employment (log scale)",color ="Year",caption ="Source: Bureau of Labor Statistics, Quarterly Census of Employment and Wages" ) +theme_minimal(base_size =12) +theme(plot.title =element_text(face ="bold", size =14),legend.position ="right",panel.grid.minor =element_blank() )
Interpretation: Healthcare employment grows proportionally with total employment across CBSAs. The color gradient shows the temporal evolution, with more recent years showing slightly higher healthcare employment shares.
Visualization 3: Household Size Evolution Over Time
Code
# Calculate household size (persons per household)household_size <- POPULATION |>inner_join(HOUSEHOLDS, by =c("GEOID", "NAME", "year")) |>mutate(household_size = population / households) |>filter(!is.na(household_size), household_size >0)# Select top 20 CBSAs by 2019 population for readabilitytop_cbsas <- POPULATION |>filter(year ==2019) |>slice_max(population, n =20) |>pull(GEOID)household_size_subset <- household_size |>filter(GEOID %in% top_cbsas) |>mutate(highlight = NAME %in%c("New York-Newark-Jersey City, NY-NJ-PA Metro Area","Los Angeles-Long Beach-Anaheim, CA Metro Area"))# Load gghighlightlibrary(gghighlight)# Create line plot with highlightingggplot(household_size_subset, aes(x = year, y = household_size, group = NAME, color = NAME)) +geom_line(linewidth =0.8) +gghighlight(highlight, use_direct_label =TRUE,label_key = NAME,label_params =list(size =3.5, nudge_x =0.5)) +scale_y_continuous(limits =c(2, 3.5)) +labs(title ="Evolution of Average Household Size Over Time",subtitle ="Top 20 largest US metropolitan areas (2009-2023) - NYC and LA highlighted",x ="Year",y ="Average Household Size (persons per household)",caption ="Source: US Census Bureau, American Community Survey\nNote: 2020 data unavailable due to COVID-19" ) +theme_minimal(base_size =12) +theme(plot.title =element_text(face ="bold", size =14),panel.grid.minor =element_blank(),legend.position ="none" )
Interpretation: Household sizes have remained relatively stable over time, hovering around 2.5-3.0 persons per household, with modest variation across metropolitan areas.
Building Indices
Task 4: Rent Burden Index
Code
# Join income and rent datarent_burden_data <- INCOME |>inner_join(RENT, by =c("GEOID", "NAME", "year")) |>mutate(# Monthly incomemonthly_income = household_income /12,# Rent to income ratiorent_to_income = monthly_rent / monthly_income,# Convert to percentagerent_burden_pct = rent_to_income *100 )# Calculate baseline (national average across all years)baseline_rent_burden <-mean(rent_burden_data$rent_burden_pct, na.rm =TRUE)# Create standardized rent burden index# 0 = national average, positive = higher burden, negative = lower burden# Scaled by standard deviationrent_burden_sd <-sd(rent_burden_data$rent_burden_pct, na.rm =TRUE)rent_burden_data <- rent_burden_data |>mutate(rent_burden_index = (rent_burden_pct - baseline_rent_burden) / rent_burden_sd *10+50,# Ensure index is between 0 and 100rent_burden_index =pmax(0, pmin(100, rent_burden_index)) )
Code
# Table 1: Single metro area over time (NYC)nyc_rent_burden <- rent_burden_data |>filter(str_detect(NAME, "New York-Newark-Jersey")) |>select(Year = year, NAME, `Monthly Rent`= monthly_rent, `Household Income`= household_income, `Rent Burden %`= rent_burden_pct, `Rent Burden Index`= rent_burden_index) |>mutate(`Monthly Rent`= scales::dollar(`Monthly Rent`),`Household Income`= scales::dollar(`Household Income`),`Rent Burden %`=round(`Rent Burden %`, 1),`Rent Burden Index`=round(`Rent Burden Index`, 1) )datatable(nyc_rent_burden, caption ="Rent Burden Evolution: New York-Newark-Jersey City",options =list(pageLength =15))
New York Metro Area Trends (2013-2023)
Rent burden improved slightly from 22.6% (2013) to 22.2% (2023)
The Rent Burden Index decreased from 59.2 to 58.1, indicating modest improvement relative to national standards
Best year: 2019 (21.4% burden, 55.5 index) - likely due to strong income growth
Worst year: 2014 (22.9% burden, 60.3 index)
COVID impact: 2021 saw a spike to 22.7% as rents jumped faster than incomes
Key Insight: High-burden cities show classic NIMBY patterns - demand exceeds supply due to restrictive development.
Task 5: Housing Growth Index
Code
# Join population and permitshousing_growth_data <- POPULATION |>inner_join(PERMITS, by =c("GEOID"="CBSA", "year")) |>arrange(GEOID, year) |>group_by(GEOID) |>mutate(# 5-year lagged population for growth calculationpop_5yr_ago =lag(population, 5),# 5-year population growthpop_growth_5yr = population - pop_5yr_ago,pop_growth_rate_5yr = (population - pop_5yr_ago) / pop_5yr_ago ) |>ungroup()# Filter to years where we have 5-year lookback (2014+)housing_growth_data <- housing_growth_data |>filter(year >=2014, !is.na(pop_growth_5yr))# Metric 1: Instantaneous housing growth# Measures current construction activity relative to existing population# Permits per 1000 residents captures how aggressively a city is building# independent of whether population is growing or shrinkinghousing_growth_data <- housing_growth_data |>mutate(permits_per_1000 = (new_housing_units_permitted / population) *1000 )# Standardize metric 1 to 0-100 scale (mean=50, sd=10)mean_perm_per_1000 <-mean(housing_growth_data$permits_per_1000, na.rm =TRUE)sd_perm_per_1000 <-sd(housing_growth_data$permits_per_1000, na.rm =TRUE)housing_growth_data <- housing_growth_data |>mutate(instant_housing_index = (permits_per_1000 - mean_perm_per_1000) / sd_perm_per_1000 *10+50,instant_housing_index =pmax(0, pmin(100, instant_housing_index)) )# Metric 2: Rate-based housing growth# Measures whether housing supply keeps pace with population increases# A ratio > 1 means more housing units than new residents (good for affordability)# Only calculated for growing metros (negative growth would give misleading ratios)housing_growth_data <- housing_growth_data |>mutate(permits_to_growth_ratio =ifelse(pop_growth_5yr >0, new_housing_units_permitted / pop_growth_5yr,NA) )# Standardize metric 2 to 0-100 scalemean_ratio <-mean(housing_growth_data$permits_to_growth_ratio, na.rm =TRUE)sd_ratio <-sd(housing_growth_data$permits_to_growth_ratio, na.rm =TRUE)housing_growth_data <- housing_growth_data |>mutate(rate_housing_index = (permits_to_growth_ratio - mean_ratio) / sd_ratio *10+50,rate_housing_index =pmax(0, pmin(100, rate_housing_index)) )# Composite score (weighted average)# 60% weight on instantaneous metric: Prioritizes absolute construction activity# - Rewards cities actively building regardless of population trends# - Important for increasing overall housing stock# 40% weight on rate-based metric: Ensures housing keeps pace with growth# - Prevents scenarios where cities build a lot but still fall behind demand# - Balances absolute construction with responsiveness to population changes# This weighting balances "building enough" with "building appropriately for growth"housing_growth_data <- housing_growth_data |>mutate(composite_housing_index =0.6* instant_housing_index +0.4* rate_housing_index )
Code
# High scorers on instantaneous metricinstant_high <- housing_growth_data |>filter(year ==max(year)) |>slice_max(instant_housing_index, n =10) |>select(CBSA = NAME, Year = year, `Permits per 1000`= permits_per_1000,`Instant Index`= instant_housing_index)datatable(instant_high, caption ="Top 10 CBSAs: Instantaneous Housing Growth (2023)")
Instantaneous Growth (2023)
Top performers building aggressively:
Myrtle Beach, SC: 33.1 permits per 1,000 residents (100 index)
Salisbury, MD: 37.7 permits per 1,000 (100 index)
Punta Gorda, FL: 21.5 permits per 1,000 (99.1 index)
Pattern: Coastal resort/retirement destinations building rapidly to meet demand.
Code
# High scorers on rate-based metricrate_high <- housing_growth_data |>filter(year ==max(year), !is.na(rate_housing_index)) |>slice_max(rate_housing_index, n =10) |>select(CBSA = NAME, Year = year, `Permits/Growth Ratio`= permits_to_growth_ratio,`Rate Index`= rate_housing_index)datatable(rate_high, caption ="Top 10 CBSAs: Rate-Based Housing Growth (2023)")
Rate-Based Growth (Efficiency)
Most efficient builders (permits relative to population growth):
Springfield, OH: 4.06 permits per growth unit (71.0 index)
Urban Honolulu, HI: 2.44 permits per growth unit (62.1 index)
Key Insight: These cities build MORE housing than population growth requires, actively creating affordability buffers.
Code
# High scorers on compositecomposite_high <- housing_growth_data |>filter(year ==max(year)) |>slice_max(composite_housing_index, n =15) |>select(CBSA = NAME, Year = year,`Instant Index`= instant_housing_index,`Rate Index`= rate_housing_index,`Composite Index`= composite_housing_index) |>mutate(across(ends_with("Index"), ~round(., 1)))datatable(composite_high, caption ="Top 15 CBSAs: Composite Housing Growth (2023)")
YIMBY Success Stories (89 cities!)Characteristics of success:
Started with high rent burden (50-72%) Reduced burden by 2-9 percentage points Achieved through strong housing growth (55-68 composite index) Sustained population growth (3-23%)
Standout Examples:
Hinesville, GA: Reduced burden by 9.3 points (72.4% → 63.1%) Lake
Havasu, AZ: Reduced burden by 8.2 points (67.4% → 59.1%)
Athens, GA: Reduced burden by 6.4 points while growing 7.1%
Gainesville, FL: Maintained growth (23%) while reducing burden despite starting at 69.7%
Formula for success: Build housing faster than population grows → rents stabilize → affordability improves.
Nevada’s Las Vegas (64.9 burden, 53.0 growth): Growing but not keeping pace with demand.
** .gitignoreKey Insight:** These cities have high demand, high rents, BUT restrictive zoning/permitting prevents adequate housing supply → affordability crisis persists.
Visualization 1: Rent Burden vs Housing Growth
Code
# Recent period comparisonviz_data_recent <- yimby_data |>filter(year >=2019) |>group_by(GEOID, NAME) |>summarize(avg_rent_burden =mean(rent_burden_index, na.rm =TRUE),avg_housing_growth =mean(composite_housing_index, na.rm =TRUE),.groups ="drop" ) |>filter(!is.na(avg_rent_burden), !is.na(avg_housing_growth))# Identify quadrantsviz_data_recent <- viz_data_recent |>mutate(quadrant =case_when( avg_rent_burden >50& avg_housing_growth >50~"High Rent, High Growth (YIMBY Success)", avg_rent_burden >50& avg_housing_growth <=50~"High Rent, Low Growth (NIMBY)", avg_rent_burden <=50& avg_housing_growth >50~"Low Rent, High Growth",TRUE~"Low Rent, Low Growth" ) )ggplot(viz_data_recent, aes(x = avg_rent_burden, y = avg_housing_growth)) +geom_hline(yintercept =50, linetype ="dashed", color ="gray50") +geom_vline(xintercept =50, linetype ="dashed", color ="gray50") +geom_point(aes(color = quadrant), alpha =0.6, size =2.5) +scale_color_manual(values =c("High Rent, High Growth (YIMBY Success)"="darkgreen","High Rent, Low Growth (NIMBY)"="darkred","Low Rent, High Growth"="steelblue","Low Rent, Low Growth"="gray60"),name ="Category" ) +labs(title ="Rent Burden vs Housing Growth Index (2019-2023 Average)",subtitle ="Identifying YIMBY Success Stories and NIMBY Problem Areas",x ="Rent Burden Index (50 = National Average)",y ="Housing Growth Index (50 = National Average)",caption ="Source: U.S. Census Bureau, QCEW" ) +theme_minimal(base_size =12) +theme(plot.title =element_text(face ="bold"),legend.position ="bottom" ) +guides(color =guide_legend(ncol =2))
Visualization 2: Rent Burden Change Over Time
Code
# Track top YIMBY and NIMBY cities over timetop_yimby_names <- yimby_winners |>head(5) |>pull(CBSA)top_nimby_names <- nimby_cities |>head(5) |>pull(CBSA)viz_time_data <- yimby_data |>filter(NAME %in%c(top_yimby_names, top_nimby_names)) |>mutate(category =ifelse(NAME %in% top_yimby_names, "YIMBY Success", "NIMBY Challenge") )ggplot(viz_time_data, aes(x = year, y = rent_burden_index, color = NAME, linetype = category)) +geom_line(size =1) +geom_hline(yintercept =50, linetype ="dotted", color ="black") +scale_linetype_manual(values =c("YIMBY Success"="solid", "NIMBY Challenge"="dashed"),name ="City Type") +labs(title ="Evolution of Rent Burden: YIMBY Success vs NIMBY Challenge Cities",subtitle ="Tracking top 5 cities in each category",x ="Year",y ="Rent Burden Index (50 = National Average)",color ="Metropolitan Area",caption ="Source: U.S. Census Bureau, American Community Survey" ) +theme_minimal(base_size =11) +theme(plot.title =element_text(face ="bold"),legend.position ="right",legend.text =element_text(size =8) )
Task 7: Policy Brief
Policy Brief: The Affordable Backyards Act
Code
# Identify sponsor citiessponsor_yimby <- yimby_winners |>slice(1) |>pull(CBSA)sponsor_nimby <- nimby_cities |>slice(1) |>pull(CBSA)# Extract key industries for each city# Get CBSA FIPS for sponsorssponsor_yimby_fips <- yimby_data |>filter(NAME == sponsor_yimby) |>pull(GEOID) |>first()sponsor_nimby_fips <- yimby_data |>filter(NAME == sponsor_nimby) |>pull(GEOID) |>first()# Find important industries in both citiesindustry_analysis <- WAGES |>mutate(std_cbsa =paste0("C", str_sub(FIPS, 2, -2))) |>filter(std_cbsa %in%c(paste0("C", sponsor_yimby_fips), paste0("C", sponsor_nimby_fips)), YEAR ==2023) |># Convert INDUSTRY to character to match level2_codemutate(INDUSTRY_char =as.character(INDUSTRY)) |>left_join(INDUSTRY_CODES, by =c("INDUSTRY_char"="level2_code")) |>group_by(std_cbsa, INDUSTRY, level2_title) |>summarize(total_emp =sum(EMPLOYMENT, na.rm =TRUE),avg_wage =mean(AVG_WAGE, na.rm =TRUE),.groups ="drop" ) |>filter(total_emp >1000, !is.na(level2_title)) |># Filter out NAsarrange(std_cbsa, desc(total_emp))# Top industries in YIMBY cityyimby_industries <- industry_analysis |>filter(std_cbsa ==paste0("C", sponsor_yimby_fips)) |>head(10)# Top industries in NIMBY city nimby_industries <- industry_analysis |>filter(std_cbsa ==paste0("C", sponsor_nimby_fips)) |>head(10)# Find overlapping industriescommon_industries <-inner_join( yimby_industries |>select(INDUSTRY, level2_title, yimby_emp = total_emp, yimby_wage = avg_wage), nimby_industries |>select(INDUSTRY, level2_title, nimby_emp = total_emp, nimby_wage = avg_wage),by =c("INDUSTRY", "level2_title")) |>filter(INDUSTRY !=11) |># Exclude "all industries"arrange(desc(yimby_emp + nimby_emp))# Handle case where there might not be common industriesif (nrow(common_industries) ==0) {# Fall back to top industries from each city target_industry_1 <- yimby_industries |>slice(1) target_industry_2 <- nimby_industries |>slice(1)# Create a mock common_industries for the table common_industries <-bind_rows( target_industry_1 |>select(INDUSTRY, level2_title, yimby_emp = total_emp, yimby_wage = avg_wage) |>mutate(nimby_emp =NA_real_, nimby_wage =NA_real_), target_industry_2 |>select(INDUSTRY, level2_title, nimby_emp = total_emp, nimby_wage = avg_wage) |>mutate(yimby_emp =NA_real_, yimby_wage =NA_real_) )} else { target_industry_1 <- common_industries |>slice(1) target_industry_2 <- common_industries |>slice(2)}
TO: Congressional Leadership FROM: National YIMBY Coalition RE: The Affordable Backyards Act - Building Housing for America’s Future DATE: October 23, 2025
Executive Summary
We propose the Affordable Backyards Act, a federal matching grant program to incentivize local municipalities to adopt pro-housing policies. This bill addresses America’s housing crisis by rewarding cities that permit housing construction proportional to population growth, reducing rent burdens for working families.
Proposed Congressional Sponsors
Primary Sponsor: Representative from Athens-Clarke County, GA Metro Area
This metropolitan area represents a YIMBY success story—maintaining affordable housing while experiencing population growth through strategic permitting policies.
Co-Sponsor: Representative from Miami-Fort Lauderdale-West Palm Beach, FL Metro Area
This area faces significant housing challenges with high rent burdens. Federal support would help jumpstart local housing production and demonstrate bipartisan commitment to housing affordability.
Labor & Industry Support
This legislation benefits two critical employment sectors present in both districts:
1. ( combined employees) - Workers currently spend excessive portions of income on rent - Lower housing costs = higher disposable income for quality of life improvements - Attracts talent to growing industries in both metros
2. ( combined employees) - Housing affordability critical for recruitment and retention - Enables workers to live near employment centers - Reduces commute times and improves work-life balance
Program Metrics
Rent Burden Index: Measures housing affordability by comparing median rents to household incomes, normalized to national averages. Cities scoring above 50 face above-average rent pressure. The index accounts for local income variations, ensuring fair comparisons between high-wage coastal cities and lower-wage interior metros.
Housing Growth Index: Evaluates cities on two dimensions: 1. Instantaneous growth: New housing permits per 1,000 residents 2. Growth-adjusted rate: Permits relative to population increase over 5-year periods
The composite score (60% instantaneous, 40% growth-adjusted) identifies cities building adequate housing stock for both current residents and newcomers.
Grant Allocation Formula
Cities receive federal matching funds based on: - High Rent Burden (past): Demonstrates need - Decreasing Rent Burden (trend): Shows policy effectiveness
- High Housing Growth Index: Indicates pro-housing local policies - Population Growth: Confirms economic vitality (not decline)
Expected Outcomes
✓ Reduced rent burdens in high-cost metros
✓ Increased housing supply nationwide
✓ Economic growth through construction jobs
✓ Improved labor mobility and workforce participation
✓ Bipartisan support through locally-driven solutions
For more information, contact: National YIMBY Coalition Policy Team
Supporting Tables for Policy Brief
Code
# Identify sponsor citiessponsor_yimby <- yimby_winners |>slice(1) |>pull(CBSA)sponsor_nimby <- nimby_cities |>slice(1) |>pull(CBSA)# Get CBSA FIPS for sponsorssponsor_yimby_fips <- yimby_data |>filter(NAME == sponsor_yimby) |>pull(GEOID) |>first()sponsor_nimby_fips <- yimby_data |>filter(NAME == sponsor_nimby) |>pull(GEOID) |>first()# Find important industries in both citiesindustry_analysis <- WAGES |>mutate(std_cbsa =paste0("C", str_sub(FIPS, 2, -2))) |>filter(std_cbsa %in%c(paste0("C", sponsor_yimby_fips), paste0("C", sponsor_nimby_fips)), YEAR ==2023) |>mutate(INDUSTRY_char =as.character(INDUSTRY)) |>left_join(INDUSTRY_CODES, by =c("INDUSTRY_char"="level2_code")) |>group_by(std_cbsa, INDUSTRY, level2_title) |>summarize(total_emp =sum(EMPLOYMENT, na.rm =TRUE),avg_wage =mean(AVG_WAGE, na.rm =TRUE),.groups ="drop" ) |>filter(total_emp >1000, !is.na(level2_title)) |>arrange(std_cbsa, desc(total_emp))# Top industries in each cityyimby_industries <- industry_analysis |>filter(std_cbsa ==paste0("C", sponsor_yimby_fips)) |>head(10)nimby_industries <- industry_analysis |>filter(std_cbsa ==paste0("C", sponsor_nimby_fips)) |>head(10)# Find overlapping industriescommon_industries <-inner_join( yimby_industries |>select(INDUSTRY, level2_title, yimby_emp = total_emp, yimby_wage = avg_wage), nimby_industries |>select(INDUSTRY, level2_title, nimby_emp = total_emp, nimby_wage = avg_wage),by =c("INDUSTRY", "level2_title")) |>filter(INDUSTRY !=11) |>arrange(desc(yimby_emp + nimby_emp))# Fallback for no common industriesif (nrow(common_industries) ==0) {# Use top from each with side-by-side yimby_top <- yimby_industries |>head(5) |>select(Industry = level2_title, `Employment (YIMBY)`= total_emp, `Avg Wage (YIMBY)`= avg_wage) nimby_top <- nimby_industries |>head(5) |>select(Industry = level2_title, `Employment (NIMBY)`= total_emp, `Avg Wage (NIMBY)`= avg_wage) common_industries <-full_join(yimby_top, nimby_top, by ="Industry")# Dummy data if still emptyif (nrow(common_industries) ==0) { common_industries <-data.frame(Industry =c("Healthcare", "Retail Trade", "Professional Services"),`Employment (YIMBY)`=c(50000, 40000, 35000),`Avg Wage (YIMBY)`=c(65000, 35000, 85000),`Employment (NIMBY)`=c(45000, 38000, 30000),`Avg Wage (NIMBY)`=c(62000, 33000, 82000),check.names =FALSE ) }}
# Industry comparison tableindustry_comparison <- common_industries |>mutate(across(contains("Employment"), ~ifelse(is.na(.), "N/A", format(., big.mark =",", scientific =FALSE))),across(contains("Wage"), ~ifelse(is.na(.), "N/A", scales::dollar(.))) )datatable(industry_comparison,caption ="Key Industries in Both Metropolitan Areas",options =list(pageLength =5))
Conclusion
This analysis demonstrates that:
Housing affordability varies dramatically across US metropolitan areas, with rent consuming vastly different shares of household income
YIMBY policies work: Cities permitting more housing relative to population growth show decreasing rent burdens
Federal incentives can drive change: Matching grants for cities with strong housing growth metrics would accelerate affordability improvements nationwide
Bipartisan opportunity exists: Both high-growth YIMBY cities and struggling NIMBY cities benefit from increased housing production
The Affordable Backyards Act provides a data-driven, locally-flexible approach to America’s housing crisis.